# coding:utf8
import findspark

findspark.init()
from pyspark.sql import SparkSession

if __name__ == '__main__':
    # 0. 构建执行环境入口对象SparkSession
    spark = SparkSession.builder. \
        appName("test"). \
        master("local[*]"). \
        getOrCreate()
    sc = spark.sparkContext

    # 读取parquet类型的文件
    df = spark.read.format("avsc").\
        option("sep",";").\
        option("header",True).\
        option("encoding","utf-8").\
        load("hdfs://bigdata:9820/pySpark_input/users.avsc")
    df.printSchema()
    df.show()
